## CLEANING WORKENVIRONMENT.
# Clearing plots.
invisible(if(!is.null(dev.list())) dev.off())
# Cleaning workspace.
rm(list=ls())
# Cleaning console.
cat("\014")## SILENCING MESSAGES AND WARNINGS.
knitr::opts_chunk$set(echo = TRUE,
message = FALSE,
warning = FALSE)
## ADDING SOME LAYOUT SETTINGS.
# Deploying figures and centering them by default.
knitr::opts_chunk$set(out.width = "100%",
fig.align = "center")
# Facilitating table layout in HTML.
options(knitr.table.format = "html")
## COLOR ARRANGEMENT
deep_blue <- "#0072B2"
black <- "black"
white <- "white"
market_color_palette <-
c("MEX" = "#5050ff",
"UK" = deep_blue,
"US" = "#74c8d4")
respondent_color_palette <-
c("Detractors" = "#e9617c",
"Passives" = "#b1b1b1",
"Promoters" = "#b1d961")This working paper is comprised of interactive visualization and analysis of NPS data. It is about market segmentation, time evolution – per quarter, per month, and per week –, and cohort analysis.
Data originate from a Kaggle “dataset from 2021, generated using real distributions of NPS data from a retail bank.” Sources are “Synthetic using real distributions of data”. Data and comments can be found at Kaggle.
It is “A dataset that has customer level data with responses to the question ‘how likely are you to recommend X to your friends and family’?”
Developments are made in R and R Markdown. Input data are in XLSX format. Output is produced in both HTML and PowerPoint format.
All documents and files from this project are in my NPS GitHub repository. Any use of them must comply with Kaggle requirements, among others with the Apache 2.0 open source license referred to on the Kaggle site.
TAGS: NPS interactive visualization, NPS analysis, market segmentation, time analysis, cohort analysis, R, R Markdown, XLSX, HTML, PPTX
# PACKAGES ASSOCIATED WITH TIDYVERSE.
if(!require(tidyverse)) install.packages("tidyverse", repos = "http://cran.us.r-project.org")
if(!require(tidyverse)) install.packages("readxl", repos = "http://cran.us.r-project.org")
if(!require(scales)) install.packages("scales", repos = "http://cran.us.r-project.org")
if(!require(lubridate)) install.packages("lubridate", repos = "http://cran.us.r-project.org")
if(!require(ggformula)) install.packages("ggformula", repos = "http://cran.us.r-project.org")
# PACKAGES ASSOCIATED WITH R MARKDOWN.
if(!require(knitr)) install.packages("knitr", repos = "http://cran.us.r-project.org")
if(!require(kableExtra)) install.packages("kableExtra", repos = "http://cran.us.r-project.org")
if(!require(gridExtra)) install.packages("gridExtra", repos = "http://cran.us.r-project.org")
# PACKAGES ASSOCIATED WITH INTERACTIVE TABLES AND GRAPHS.
if(!require(htmltools)) install.packages("htmltools", repos = "http://cran.us.r-project.org")
if(!require(shiny)) install.packages("shiny", repos = "http://cran.us.r-project.org")
if(!require(httpuv)) install.packages("httpuv", repos = "http://cran.us.r-project.org")
if(!require(xtable)) install.packages("xtable", repos = "http://cran.us.r-project.org")
if(!require(sourcetools)) install.packages("sourcetools", repos = "http://cran.us.r-project.org")
if(!require(fastmap)) install.packages("fastmap", repos = "http://cran.us.r-project.org")
if(!require(DT)) install.packages("DT", repos = "http://cran.us.r-project.org")
if(!require(plotly)) install.packages("plotly", repos = "http://cran.us.r-project.org")
# Package to download and install packages from Remote Repositories.
if(!require(remotes)) install.packages("remotes", repos = "http://cran.us.r-project.org")
# Package for colors
if(!require(RColorBrewer)) install.packages("RColorBrewer", repos = "http://cran.us.r-project.org")
# REQUIRING LIBRARIES.
library(tidyverse)
library(readxl)
library(scales)
library(lubridate)
library(ggformula)
library(knitr)
library(kableExtra)
library(gridExtra)
library(htmltools)
library(shiny)
library(httpuv)
library(xtable)
library(sourcetools)
library(fastmap)
library(DT)
library(plotly)
library(remotes)
library(RColorBrewer)
# FROM GITHUB
# Basic
if(!require(githubinstall)) install.packages("githubinstall", repos = "http://cran.us.r-project.org")
library(githubinstall)
# Tool to export graphs from R to PPTX, etc.
if(!require(export)) remotes::install_github("tomwenseleers/export")
library(export)Let us extract some general preemptive insights from data before diving into market segmentation.
The graph below shows NPS responses from a random sample, which is used instead of the whole dataset to prevent overplotting.
# Uploading data that have already been downloaded from Kaggle.
data <- read_csv("E:/DS/R/86_NPS/NPStimeseries.csv") %>%
as.data.frame()
temporary_copy <- data
# New vector of column names
names <- c(
"ID",
"market",
"survey_date",
"customer_name",
"month",
"quarter",
"NPS")
# New factor levels for quarter and months
quarter_levels <-
c("Q1", "Q2", "Q3", "Q4")
month_levels <- month.abb[1:12]
# Renaming some columns, converting some character vectors into factors,
# and changing some factor levels.
data <- data %>%
`colnames<-`(names) %>%
mutate(market = as.factor(market)) %>%
mutate(formatted_date = as.Date(survey_date, "%d/%m/%Y")) %>%
mutate(week_nr = week(formatted_date)) %>%
mutate(respondent_category = ifelse(NPS > 8, "Promoters",
ifelse(NPS > 6, "Passives", "Detractors"))) %>%
mutate(respondent_category = as.factor(respondent_category)) %>%
mutate(quarter_name = factor(quarter, labels = quarter_levels)) %>%
mutate(month_name = factor(month, labels = month_levels))
df <- data %>%
group_by(month_name, NPS) %>%
summarize(`Responses Count` = n()) %>%
ungroup()
graph <- ggplot(df, aes(month_name, NPS)) +
geom_tile(aes(fill = `Responses Count`)) +
scale_fill_distiller(palette = "YlGnBu", direction = 1) +
labs(title = "Dispersion of NPS Responses",
x = "",
y = "NPS Scale") +
scale_y_continuous(breaks = pretty_breaks()) +
theme(
plot.title = element_text(hjust = 0.5, vjust = 2,
size = 16, face = "bold"),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 14, face = "bold"),
axis.text.y = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 14, face = "bold"),
legend.position = "bottom",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_rect(fill = white),
plot.background = element_rect(fill = white))
# Exporting static graph to the PowerPoint document.
graph2ppt(graph, "NPS.pptx", width = 10, height = 7)
p <- ggplotly(graph)
# Clarifies hover information.
for (i in 1:12) {
p$x$data[[1]]$text[,i] <-
str_replace_all(p$x$data[[1]]$text[,i], "month_name:\\s+", "")
p$x$data[[1]]$text[,i] <-
str_replace_all(p$x$data[[1]]$text[,i], "NPS", "NPS Level")
}
pWe had a swift look at the distribution of the NPS responses. Let us now have a look at the data as they stand in the original file.
# A static table will be exported to the PowerPoint document. Assigning it
# to a dummy variable prevents printing it in the HTML document Rmd.html.
df <- head(temporary_copy, 6)
dustbin <- table2ppt(df, "NPS.pptx", digits = 0,
height = 6, pointsize = 16, append = TRUE)
# The second table is an interactive one that will be printed
# in the HTML document NPS.html when knitting NPS.Rmd.
# JavaScript extension to color/background color the table header.
initComplete <-
c("function(settings, json) {
",
"$(this.api().table().header()).css({
'background-color': '#0072B2', 'color': 'white'
});",
"}
")
# This JavaScript extension Will color/background color the body text.
rowCallback <-
c("function(row, data, index, rowId) {
",
"console.log(rowId)",
"if(rowId >= 0) {
",
"row.style.backgroundColor = 'white',
row.style.color = '#0072B2';",
"}",
"}
")
# Using DT package to create and print the interactive table.
datatable(temporary_copy, rownames = FALSE, filter = "top",
options =
list(pageLength = 6, scrollX = T,
columnDefs = list(list(className = 'dt-center',
targets = 0:(ncol(temporary_copy) - 1))),
initComplete = JS(initComplete),
rowCallback = JS(rowCallback)))In the table above, each NPS score is characterized by
Some global statistics will be derived and included in the next table.
# temporary_copy is removed here because removing it at the end
# of the previous code chunk would create an additional button on the
# right-hand side of the HTML document.
rm(temporary_copy)
# Some global statistics
nr_responses <- nrow(data)
nr_respondents <- length(unique(data$customer_name))
first_survey_date <- min(data$formatted_date)
last_survey_date <- max(data$formatted_date)
# Computing aggregate NPS score from all markets over the whole period.
aggregate_NPS_score <- data %>%
group_by(respondent_category) %>%
summarise(n = n()) %>%
spread(respondent_category, n) %>%
mutate(total = Detractors + Passives + Promoters) %>%
mutate(x = round((Promoters - Detractors) * 100 / total), 0) %>%
.$x
# Constructing presentation table for global statistics.
tab <- data.frame(nr_responses, nr_respondents,
first_survey_date, last_survey_date,
aggregate_NPS_score) %>%
`colnames<-`(c("Responses",
"Respondents",
"First Response",
"Last Response",
"Aggregate NPS Score"))
# Exporting static table to PowerPoint. Assigning it to a dummy variable
# prevents printing it in the HTML document Rmd.html.
dustbin <- table2ppt(tab, "NPS.pptx", digits = 0,
height = 2, pointsize = 18, append = TRUE)
# Printing interactive table in NPS.html document when knitting NPS.Rmd.
knitr::kable(tab, align = "c", table.attr = "class = 'bg-primary'") %>%
kable_styling()| Responses | Respondents | First Response | Last Response | Aggregate NPS Score |
|---|---|---|---|---|
| 5000 | 4829 | 2021-01-01 | 2021-12-30 | 12 |
The number of responses is 5,000, which seems rather significant from a statistical point of view.
The number of respondents of 4,829. Some respondents have responded more than once. NPS responses from this subgroup will be further investigated through a cohort analysis to see how responses evolve over time for the same people in this survey.
There are three markets, identified as countries.
The survey data cover the whole period of 2021; whether responses are spread equally over time will be checked.
The aggregate NPS score is 12 for the three markets taken together over the whole period of 2021. The aggregate NPS score has been computed on all responses together. The result is the same after rounding if the aggregate NPS score is first calculated for each market and for each month before averaging the 36 partial results. Without rounding, the difference is just 0.17 between both methods. The minute difference is due to volume effects: if for instance one month on one market has more responses, then its weight in the global result is bigger when the aggregate NPS is computed on all responses taken together.
Let us dive into market segmentation.
For each of the three markets, four developments are going to be produced:
The NPS responses total 5,000. How do they break down by market?
tab<- data %>%
group_by(market) %>%
summarize(n = n()) %>%
`colnames<-`(c("Market", "Number of Responses"))
# Exporting the table to PowerPoint. Assigning it to a dummy variable
# prevents it being printed in the HTML document Rmd.html as well.
dustbin <- table2ppt(tab, "NPS.pptx", height = 3, offx = 3,
pointsize = 20, append = TRUE)
# Printing the table into the HTML document NPS.html when knitting NPS.Rmd.
knitr::kable(tab, align = "c", table.attr = "class = 'bg-primary'") %>%
kable_styling()| Market | Number of Responses |
|---|---|
| MEX | 1649 |
| UK | 1720 |
| US | 1631 |
Responses are almost evenly broken down by market, namely approximately one third for each market, with the UK market representing one percentage point more than the other two markets. Consequently, we know that the three markets contributed the same quantitative influence to the aggregate NPS score of 12 reached by the threee markets together (see table above).
Actually, the NPS responses have been generated from real distributions. Their numbers are not the actual numbers of responses to surveys, which we don’t know. We cannot compare them to recommendations such as these recommendations.
We do not know the response rate either.
Let us turn now, for each market, to the breakdown by respondent category.
df <- data %>%
group_by(market, respondent_category) %>%
summarize(n = n()) %>%
spread(respondent_category, n) %>%
mutate(total = Promoters + Passives + Detractors) %>%
mutate(aggregate_NPS_score =
round(((Promoters - Detractors) * 100 / total)), 0)
graph <- df %>%
ggplot() +
geom_bar(aes(x = market, y = aggregate_NPS_score, fill = market),
width = 1, stat = "identity") +
scale_fill_manual(values = market_color_palette) +
labs(title = "Aggregate NPS Score by Market",
fill = "",
y = "Aggregate NPS Score \n") +
theme(
plot.title = element_text(hjust = 0.5, vjust = 2,
size = 16, face = "bold"),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 14, face = "bold"),
legend.position = "bottom",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_rect(fill = white),
plot.background = element_rect(fill = white))
# Exporting static graph to the PowerPoint document.
graph2ppt(graph, "NPS.pptx", width = 10, height = 7, append = TRUE)
# Exporting the interactive graph to the HTMl document.
p <- ggplotly(graph, height = 360, width = 540) %>%
layout(legend = list(orientation = "h", x = 0.3, y = -0.1),
hoverlabel = list(bordercolor = white))
# Clarifies hover information.
for (i in 1:3) {
p$x$data[[i]]$text <-
str_replace(p$x$data[[i]]$text,
"market",
"Market")
p$x$data[[i]]$text <-
str_replace(p$x$data[[i]]$text,
"aggregate_NPS_score",
"Aggregate NPS Score")
p$x$data[[i]]$text <-
str_replace(p$x$data[[i]]$text,
"(\\d+)(.+)",
"\\1")
}
# Centers the graph, because the centering opts_chunk previously
# inserted has not always proved fully operative with function ggplotly().
htmltools::div(p)On the three markets, the retail bank reaches positive aggregate NPS scores, limited though. With an aggregate NPS score of 17, the MEX market is hovering above the other two markets. The US market is at 10 and the UK market at 8.
17 is very close to an average NPS score of 19 published by Qualtrics for the banking industry in the US in 2021: Economics of NPS in the Banking Industry - Qualtrics https://www.qualtrics.com › uploads › 2021/03.
10 is below that average.
But we do not know the real profile of the retail bank whose NPS response distributions have been used to generate the dataset.
The next set of graphs breaks NPS responses down by market and by respondent category.
graph <- data %>%
ggplot(aes(x = respondent_category, fill = respondent_category)) +
geom_bar(width = 1) +
facet_grid(.~market) +
scale_fill_manual(values = respondent_color_palette) +
labs(title = "Number of Responses by Category",
fill = "",
y = "Number of Responses") +
theme(
plot.title = element_text(hjust = 0.5, vjust = 2,
size = 16, face = "bold"),
strip.text.x = element_text(size = 12, face = "bold"),
strip.background = element_rect(fill = white),
axis.title.x = element_blank(),
axis.title.y = element_text(vjust = 2, size = 14),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 14, face = "bold"),
legend.position = "bottom",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_rect(fill = white),
plot.background = element_rect(fill = white),
panel.spacing = unit(2, "lines"))
# Creating the interactive graph to the HTMl document.
graph2ppt(x = graph, file = "NPS.pptx",
width = 10, height = 7, append = TRUE)
# Exporting the interactive graph to the HTMl document.
p <- ggplotly(graph, height = 400) %>%
layout(legend = list(orientation = "h", x = 0.2, y = -0.1),
hoverlabel = list(bordercolor = white))
# Clarifying hover information.
for (i in 1:9) {
# Changing order of information pieces and discarding duplicates.
p$x$data[[i]]$text <-
str_replace(p$x$data[[i]]$text,
"(^count:\\s\\d+)(<br\\s/>)([[:alpha:]_]+:\\s[:alpha:]+)(<br\\s/>)(.+$)",
"\\3\\2\\1")
p$x$data[[i]]$text <-
str_replace(p$x$data[[i]]$text,
"respondent_category",
"Respondent Category")
p$x$data[[i]]$text <-
str_replace(p$x$data[[i]]$text,
"count",
"Number of Responses")
}
# Printing interactive graph in HTMP document.
pThe MEX market has approximately the same number of passives as the UK market. The difference in aggregate NPS score comes mainly from the detractors (100 detractors less) and secondarily from the promoters (36 promoters more). This explains the difference in aggregate NPS score: 17 for the MEX market, 8 for the UK market.
The US market has an aggregate NPS score of 10, against 8 for the UK market. In fact, the US market has a little more promoters (+ 8) and a little bit detractors (- 8). The lower number of passives along the US side also explains one third of the difference between the two markets.
The next graph gives the percentage of promoters and detractors by market.
df <- data %>%
select(market, respondent_category) %>%
group_by(market, respondent_category) %>%
summarize(n = n()) %>%
spread(respondent_category, n) %>%
mutate(total = Promoters + Passives + Detractors) %>%
mutate(Promoters = round(Promoters * 100 / total, 0)) %>%
mutate(Passives = round(Passives * 100 / total, 0)) %>%
mutate(Detractors = round(Detractors * 100 / total, 0)) %>%
select(market, Promoters, Passives, Detractors) %>%
gather(respondent_category, percentage,
"Promoters":"Passives":"Detractors") %>%
mutate(respondent_category = as.factor(respondent_category)) %>%
mutate(respondent_category =
fct_reorder(respondent_category, desc(respondent_category)))
graph <- df %>%
ggplot(aes(x = market, y = percentage, fill = respondent_category)) +
geom_col(width = 0.6) +
scale_fill_manual(values = respondent_color_palette) +
labs(title = "Respondent Categories in %",
fill = "",
y = "Percentage") +
theme(
plot.title = element_text(hjust = 0.5, vjust = 2,
size = 16, face = "bold"),
strip.text.x = element_text(size = 12, face = "bold"),
strip.background = element_rect(fill = white),
axis.title.x = element_blank(),
axis.title.y = element_text(vjust = 2, size = 14),
axis.text.x = element_text(size = 14, face = "bold"),
axis.text.y = element_blank(),
legend.position = "none",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = white),
plot.background = element_rect(fill = white))
# Creating the interactive graph to the HTMl document.
graph2ppt(x = graph, file = "NPS.pptx",
width = 10, height = 7, append = TRUE)
# Exporting the interactive graph to the HTMl document.
p <- ggplotly(graph, height = 360, width = 540) %>%
layout(legend = list(orientation = "h", x = 0.25, y = -0.1),
hoverlabel = list(bordercolor = white))
# Clarifying hover information.
for (i in 1:3) {
for (j in 1:3) {
p$x$data[[i]]$text[j] <- str_replace(
p$x$data[[i]]$text[j],
"(^[[:alpha:]:\\s]+)(<br\\s+/>)([[:alnum:]:\\s]+)(<br\\s+/>)(.+$)",
"\\1\\2\\5\\4\\3"
)
p$x$data[[i]]$text[j] <-
str_replace(p$x$data[[i]]$text[j], "market", "Market")
p$x$data[[i]]$text[j] <-
str_replace(p$x$data[[i]]$text[j],
"respondent_category",
"Respondent Category")
p$x$data[[i]]$text[j] <-
str_replace(p$x$data[[i]]$text[j], "percentage", "Percentage")
p$x$data[[i]]$text[j] <-
str_replace(p$x$data[[i]]$text[j], "(\\d+$)", "\\1 %")
}
}
# Printing interactive graph in HTMP document.
pThe MEX market is better in both respondent categories: higher percentage of promoters, with 48 %, and lower percentage of detractors, with 31 %.
The US market is almost as good in promoters but with a substantially higher percentage of detractors, namely 37 %.
df <- data %>%
select(market, NPS) %>%
group_by(market, NPS) %>%
summarize(n = n()) %>%
mutate(NPS = as.factor(NPS))
# Color vector for the 11 NPS score levels
det <- replicate(7, "#e9617c")
pas <- replicate(2, "#ffc862")
prom <- replicate(2, "#b1d961")
col_11 <- c(det, pas, prom)
graph <- df %>%
ggplot(aes(x = NPS, y = n, fill = NPS)) +
geom_col(width = 1) +
scale_fill_manual(values = col_11) +
facet_grid(.~market) +
labs(title = "Detailed NPS Levels",
fill = "",
y = "Number of Responses") +
scale_x_discrete(breaks = pretty_breaks(n = 6)) +
theme(
plot.title = element_text(hjust = 0.5, vjust = 2,
size = 16, face = "bold"),
strip.text.x = element_text(size = 14, face = "bold"),
strip.background = element_rect(fill = white),
axis.title.x = element_blank(),
axis.title.y = element_text(vjust = 2, size = 16, face = "bold"),
axis.text.x = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 14, face = "bold"),
legend.position = "none",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_rect(fill = white),
plot.background = element_rect(fill = white),
panel.spacing = unit(2, "lines"))
# Exporting static graph to NPS.pptx.
graph2ppt(graph, "NPS.pptx", width = 10, height = 7, append = TRUE)
# Creating interactive graph for NPS.htmp.
p <- ggplotly(graph, height = 350) %>%
layout(legend = list(orientation = "h", x = 0.25, y = -0.1),
hoverlabel = list(bordercolor = white))
# Clarifying hover information.
for (i in 1:33) {
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "<br\\s/>NPS:\\s\\d+$", "")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "NPS", "NPS Score")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "n", "Number of Responses")
}
# Printing interactive graph in NPS.html when knitting NPS.Rmd.
pConcentration at the extremes for MEX. Higher in number and even more in percentage with respect to the UK.
Respondent Categories
df <- data %>%
group_by(market, quarter, respondent_category) %>%
summarize(n = n()) %>%
spread(respondent_category, n) %>%
mutate(total = Promoters + Passives + Detractors) %>%
mutate(Promoters = Promoters * 100 / total) %>%
mutate(Passives = Passives * 100 / total) %>%
mutate(Detractors = Detractors * 100 / total) %>%
mutate(nps = round(Promoters - Detractors, 0)) %>%
select(market, quarter, Promoters, Passives, Detractors, nps) %>%
gather(respondent_category, percentage,
"Promoters":"Passives":"Detractors") %>%
mutate(respondent_category = as.factor(respondent_category))
graph <- df %>%
ggplot() +
geom_col(aes(x = quarter, y = percentage,
fill = respondent_category), width = 0.6) +
geom_point(aes(x = quarter, y = nps),
shape = 16, size = 3, color = black) +
geom_line(aes(x = quarter, y = nps),
color = deep_blue) +
scale_fill_manual(values = respondent_color_palette) +
facet_grid(.~market) +
labs(title = "Quarterly Trend of Aggregate NPS Score (•)",
fill = "",
y = "") +
theme(
plot.title = element_text(hjust = 0.5, vjust = 2,
size = 16, face = "bold"),
strip.text.x = element_text(size = 14, face = "bold"),
strip.text.y = element_text(size = 14, face = "bold"),
strip.background = element_rect(fill = white),
axis.title.x = element_blank(),
axis.title.y = element_text(vjust = 2, size = 16, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 14, face = "bold"),
legend.position = "bottom",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_rect(fill = white),
plot.background = element_rect(fill = white),
panel.spacing = unit(2, "lines"))
# Creating static graph for the NPS.pptx document.
graph2ppt(x = graph, file = "NPS.pptx",
width = 10, height = 7, append = TRUE)
# Creating interactive graph for the NPS.html document.
p <- ggplotly(graph, height = 400) %>%
layout(legend = list(orientation = "h", x = 0.2, y = -0.1),
hoverlabel = list(bordercolor = white))
# Clarifying hover information about respondent categories.
for (i in 1:9) {
# Reordering hover information pieces.
p$x$data[[i]]$text <- str_replace_all(p$x$data[[i]]$text,
"([[:alpha:]_:\\s\\d]+)(<br\\s/>)([[:alpha:]_:\\s\\d\\.]+)(<br\\s/>)(.+)",
"\\5\\4\\1\\2\\3 %")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text,
"respondent_category:\\s+",
"")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text,
"quarter_name:\\s+Q",
" Quarter ")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text,
"percentage:\\s+",
" ")
}
# Rounding percentages in hover information.
for (i in 1:9) {
for (j in 1:4) {
per <- str_extract(p$x$data[[i]]$text[j], "\\d+\\.\\d+")
per2 <- as.character(round(as.numeric(per), 0))
p$x$data[[i]]$text[j] <-
str_replace(p$x$data[[i]]$text[j], per, per2)
}
}
# Clarifying hover information about aggregate NPS score.
p$x$data[[10]]$text <-
str_replace_all(p$x$data[[10]]$text,
"quarter_name:\\s+Q",
"Quarter ")
p$x$data[[10]]$text <-
str_replace_all(p$x$data[[10]]$text,
"nps",
"Aggregate NPS Score")
# Printing interactive graph in NPS.html when knitting NPS.Rmd.
pdf <- data %>%
group_by(market, month_name, respondent_category) %>%
summarize(n = n()) %>%
spread(respondent_category, n) %>%
mutate(total = Promoters + Passives + Detractors) %>%
mutate(Promoters = Promoters * 100 / total) %>%
mutate(Passives = Passives * 100 / total) %>%
mutate(Detractors = Detractors * 100 / total) %>%
mutate(nps = round(Promoters - Detractors, 0)) %>%
select(market, month_name, Promoters, Passives, Detractors, nps) %>%
gather(respondent_category, percentage,
"Promoters":"Passives":"Detractors") %>%
mutate(respondent_category = as.factor(respondent_category))
graph <- df %>%
ggplot() +
geom_col(aes(x = month_name, y = percentage,
fill = respondent_category), width = 0.6) +
geom_point(aes(x = month_name, y = nps),
shape = 16, size = 2, color = black) +
scale_fill_manual(values = respondent_color_palette) +
facet_grid(.~market) +
labs(title = "Monthly Trend of Aggregate NPS Score (•)",
fill = "",
y = "") +
theme(
plot.title = element_text(hjust = 0.5, vjust = 2,
size = 16, face = "bold"),
strip.text.x = element_text(size = 14, face = "bold"),
strip.text.y = element_text(size = 14, face = "bold"),
strip.background = element_rect(fill = white),
axis.title.x = element_blank(),
axis.title.y = element_text(vjust = 2, size = 16, face = "bold"),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 14, face = "bold"),
legend.position = "bottom",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_rect(fill = white),
plot.background = element_rect(fill = white),
panel.spacing = unit(2, "lines"))
# Creating static graph for the NPS.pptx document.
graph2ppt(x = graph, file = "NPS.pptx",
width = 10, height = 7, append = TRUE)
# Creating interactive graph for the NPS.html document.
p <- ggplotly(graph, height = 400) %>%
layout(legend = list(orientation = "h", x = 0.2, y = -0.1),
hoverlabel = list(bordercolor = white))
# Clarifying hover information about respondent categories.
for (i in 1:9) {
# Reordering hover information pieces.
p$x$data[[i]]$text <- str_replace_all(p$x$data[[i]]$text,
"([[:alpha:]_:\\s\\d]+)(<br\\s/>)([[:alpha:]_:\\s\\d\\.]+)(<br\\s/>)(.+)",
"\\5\\4\\1\\2\\3 %")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text,
"respondent_category:\\s+",
"")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text,
"month_name:\\s+",
" ")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text,
"percentage:\\s+",
" ")
}
# Rounding percentages in hover information.
for (i in 1:9) {
for (j in 1:12) {
per <- str_extract(p$x$data[[i]]$text[j], "\\d+\\.\\d+")
per2 <- as.character(round(as.numeric(per), 0))
p$x$data[[i]]$text[j] <-
str_replace(p$x$data[[i]]$text[j], per, per2)
}
}
# Clarifying hover information about aggregate NPS score.
for (i in 10:12) {
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "month_name:\\s+", "")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "nps", " NPS")
}
# Printing interactive graph in NPS.html when knitting NPS.Rmd.
pActually, measuring evolution of NPS per month might lack precision. Maybe an upward move or a downward one started in the middle of a month and ended in the middle of another month.
Consequently, let’s visualize NPS evolution on a day basis and spline it according to real changes.
df <- data %>%
group_by(market, week_nr, respondent_category) %>%
summarize(n = n()) %>%
spread(respondent_category, n) %>%
mutate(total = Promoters + Passives + Detractors) %>%
mutate(aggregate_NPS_score =
round((Promoters - Detractors) * 100 / total, 0))
graph <- df %>%
ggplot(aes(x = week_nr, y = aggregate_NPS_score,
color = market)) +
geom_point() +
geom_smooth() +
scale_color_manual(values = market_color_palette) +
facet_grid(.~market) +
labs(title = "Weekly Trend of Aggregate NPS Score",
color = "",
y = "Aggregate NPS Score") +
theme(
plot.title = element_text(hjust = 0.5, vjust = 2,
size = 16, face = "bold"),
strip.text.x = element_text(size = 14, face = "bold"),
strip.background = element_rect(fill = "lightgray"),
axis.title.x = element_blank(),
axis.title.y = element_text(vjust = 2, size = 14, face = "bold"),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 14, face = "bold"),
legend.position = "none",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_rect(fill = white),
plot.background = element_rect(fill = "lightgray"),
panel.spacing = unit(2, "lines"))
graph2ppt(x = graph, file = "NPS.pptx",
width = 10, height = 7, append = TRUE)
p <- ggplotly(graph, height = 350) %>%
layout(legend = list(orientation = "h", x = 0.3, y = -0.1),
hoverlabel = list(bordercolor = white))
# Clarifying hover information about aggregate NPS score.
for (i in 1:3) {
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "week_nr", "Week")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text,
"aggregate_NPS_score",
" NPS")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "<br\\s+/>market.+$", "")
}
# Suppressing hover information about loess for clarity reasons.
for (i in 4:9) {
p$x$data[[i]]$hoverinfo <- "none"
}
# Printing interactive graph in NPS.html document when knitting NPS.Rmd.
pEvolution of promoters and detractors per week
df <- data %>%
group_by(market, week_nr, respondent_category) %>%
summarize(n = n())
graph <- df %>%
ggplot(aes(x = week_nr, y = n, color = respondent_category)) +
scale_color_manual(values = respondent_color_palette) +
geom_point(aes(size = n)) +
scale_size_continuous(range = c(0.5, 3)) +
geom_smooth() +
facet_grid(respondent_category~market) +
labs(title = "Responses by Respondent Category per Week",
color = "",
y = "Number of Responses Number of Responses Number of Responses") +
theme(
plot.title = element_text(hjust = 0.5, vjust = 2,
size = 16, face = "bold"),
strip.text.x = element_text(size = 14, face = "bold"),
strip.text.y = element_text(size = 14, face = "bold"),
strip.background = element_rect(fill = "lightgray"),
axis.title.x = element_blank(),
axis.title.y = element_text(vjust = 2, size = 14),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 14, face = "bold"),
legend.position = "none",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_rect(fill = white),
plot.background = element_rect(fill = "lightgray"),
panel.spacing = unit(2, "lines"))
graph2ppt(x = graph, file = "NPS.pptx",
width = 10, height = 7, append = TRUE)
p <- ggplotly(graph, height = 800) %>%
layout(legend = list(orientation = "h", x = 0.25, y = -0.1),
hoverlabel = list(bordercolor = white))
# Clarifying hover information about aggregate NPS score.
for (i in 1:27) {
# Removing duplicated number of responses in the first 9 text blocks
# with no impact on the next text blocks.
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "<br\\s/>n:\\s+\\d+$", "")
# Changing order among info pieces.
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text,
"([[:alnum:]_:\\s\\.]+)(<br\\s/>)([[:alnum:]:\\s\\.]+)(<br\\s/>)(.+$)",
"\\5\\4\\1\\2\\3")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "respondent_category:\\s+", "")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "week_nr", "Week Number")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "n:", "Number of Responses:")
}
# Suppressing hover information about loess for clarity reasons.
for (i in 10:27) {
p$x$data[[i]]$hoverinfo <- "none"
}
# Printing interactive graph in NPS.html document when knitting NPS.Rmd.
pWith the respondents who responded more than once, let us build up cohorts on a monthly basis. All respondents will be regrouped according to their first response month.
First aggregate nps scores.
names_mex <- data %>%
filter(market == "MEX") %>%
arrange(customer_name) %>%
filter(!duplicated(customer_name)) %>%
.$customer_name
names_uk <- data %>%
filter(market == "UK") %>%
arrange(customer_name) %>%
filter(!duplicated(customer_name)) %>%
.$customer_name
names_us <- data %>%
filter(market == "US") %>%
arrange(customer_name) %>%
filter(!duplicated(customer_name)) %>%
.$customer_name
# Creating receptacle data frames for storing cohort details
# generated by next for loop. These data frames will also be used
# in the next code chunk.
df_cohorts_MEX <- data.frame(x = 1:2, y = 1:2)
df_cohorts_UK <- data.frame(x = 1:2, y = 1:2)
df_cohorts_US <- data.frame(x = 1:2, y = 1:2)
# Creating receptacle data frames for storing NPS details
# generated by next for loop. These data frames will be used
# in the facet grid procedure below in this code chunk.
df_nps_MEX <- data.frame(x = 1:2, y = 1:2)
df_nps_UK <- data.frame(x = 1:2, y = 1:2)
df_nps_US <- data.frame(x = 1:2, y = 1:2)
# for loop in order to populate cohorts and
# calculate aggregate NPS scores.
iterable <- c("MEX", "UK", "US")
for (current_market in iterable) {
# Building up the 3 cohorts on a quarterly basis.
df_cohorts <- data %>%
select(customer_name, quarter, quarter_name,
respondent_category, market) %>%
filter(market == current_market) %>%
group_by(customer_name) %>%
mutate(cohort = min(quarter)) %>%
ungroup() %>%
filter(cohort < 4) %>%
mutate(cohort_name = factor(cohort,
labels = c("Cohort Starting in Q1", "Cohort Starting in Q2",
"Cohort Starting in Q3"))) %>%
mutate(market = current_market) %>%
select(market, cohort, cohort_name, quarter,
customer_name, respondent_category)
# Saving df_cohorts outside of the local scope.
# This will be used further on in the next code chunk.
assign(paste("df_cohorts_", current_market, sep = ""),
df_cohorts)
# Calculating aggregate NPS by cohort.
df_nps <- df_cohorts %>%
select(- customer_name) %>%
group_by(cohort_name, quarter, respondent_category,
.drop = FALSE) %>%
summarize(n = n()) %>%
ungroup() %>%
spread(respondent_category, n) %>%
mutate(total = Detractors + Passives + Promoters) %>%
mutate(aggregate_NPS_score =
round((Promoters - Detractors) * 100 / total, 0)) %>%
mutate(cohort = str_sub(cohort_name, -1)) %>%
mutate(market = current_market) %>%
select(market, cohort, cohort_name, quarter,
aggregate_NPS_score)
# Saving df_nps outside of the local scope.
assign(paste("df_nps_", current_market, sep = ""), df_nps)
}
# Binding the 3 df_nps... data frames for facet grid.
df_nps<- rbind(df_nps_MEX, df_nps_UK, df_nps_US)
# Creating graph with aggregate NPS score by market, cohort,
# and response quarter.
cohort_graph <- df_nps %>%
filter(cohort %in% 1:2) %>%
ggplot(aes(quarter, aggregate_NPS_score, color = cohort_name)) +
scale_color_manual(values = c("gray", black)) +
geom_line(linetype = "dotted", size = 1) +
geom_point(aes(size = cohort_name), show.legend = FALSE) +
scale_size_manual(values = c(2.5, 1.5)) +
facet_grid(.~market) +
labs(title = "Aggregate NPS Score by Cohort",
linetype = "",
shape = "",
color = "",
y = "Aggregate NPS Score") +
theme(
plot.title = element_text(hjust = 0.5, vjust = 2,
size = 16, face = "bold"),
strip.text.x = element_text(size = 12, face = "bold"),
strip.background = element_rect(fill = "lightgray"),
axis.title.x = element_blank(),
axis.title.y = element_text(vjust = 2, size = 14),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 14, face = "bold"),
legend.position = "bottom",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_rect(fill = white),
plot.background = element_rect(fill = "lightgray"),
panel.spacing = unit(2, "lines"))
graph2ppt(cohort_graph, "NPS.pptx",
width = 10, height = 8, append = TRUE)
p <- ggplotly(cohort_graph, height = 400) %>%
layout(legend = list(orientation = "h", x = 0.15, y = -0.1),
hoverlabel = list(bordercolor = white))
# Clarifying hover information about aggregate NPS score.
for (i in 1:12) {
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text,
"([[:alnum:]-:\\s]+)(<br\\s+/>)([[:alnum:]-_:\\s]+)(<br\\s+/>)(.+$)",
"\\5\\4\\1\\2\\3")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "cohort_name:\\s+", "")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text, "quarter", "Responses Quarter")
p$x$data[[i]]$text <-
str_replace_all(p$x$data[[i]]$text,
"aggregate_NPS_score",
"Aggregate NPS Score")
}
for (i in 7:12) {
# Suppresses duplicated cohort name.
p$x$data[[i]]$text <-
str_replace(p$x$data[[i]]$text, "[[:alnum:]\\s<]+/>", "")
}
for (i in 1:6) {
# Suppresses the size layer in the legend, which has been hidden in ggplot2
# but reappers in ggplotly.
p$x$data[[i]]$showlegend <- FALSE
}
pRetention of respondents
# Creating for loop to generate retention table for the 3 markets.
counter <- 0
for (current_market in iterable) {
# Counting respondents per market, cohort, and quarter.
df_respondents <-
eval(parse(text = paste0("df_cohorts_", current_market))) %>%
mutate(user_lifetime = quarter - as.numeric(cohort)) %>%
mutate(key = paste0(cohort, quarter, customer_name)) %>%
arrange(key) %>%
filter(!duplicated(key)) %>%
mutate(user_lifetime = as.factor(user_lifetime))
# Computing retention rates per cohort.
df_retention <- df_respondents %>%
group_by(cohort_name, user_lifetime, .drop = FALSE) %>%
summarize(n = n()) %>%
ungroup() %>%
pivot_wider(names_from = user_lifetime, values_from = n) %>%
`colnames<-`(c("Cohort", paste0("c", 0:3))) %>%
mutate(c1 = label_percent(accuracy = 0.01,
suffix = " %")(c1 / c0)) %>%
mutate(c2 = label_percent(accuracy = 0.01,
suffix = " %")(c2 / c0)) %>%
mutate(c3 = label_percent(accuracy = 0.01,
suffix = " %")(c3 / c0)) %>%
`colnames<-`(c(paste0("Respondent Cohort in ", current_market, " Market"),
"Respondents in Join Quarter",
"Retention in Join Quarter + 1",
"Retention in Join Quarter + 2",
"Retention in Join Quarter + 3"))
# Formatting the triangular chart in the retention table.
df_retention[3, 4] <- ""
df_retention[2:3, 5] <- ""
# Exporting static table to PowerPoint. Assigning it to a dummy variable
# prevents printing it in the HTML document Rmd.html.
dustbin <- table2ppt(df_retention, "NPS.pptx", digits = 0,
height = 2, pointsize = 18, append = TRUE)
# Saving resulting table outside of the local scope.
assign(paste0("retention_tab_", current_market), df_retention)
red <- "#ff7f7f"
# Preparing retention table.
graph <- knitr::kable(df_retention, align = "c") %>%
kable_styling(full_width = T, font_size = 16) %>%
column_spec(1, width = "2in", bold = TRUE,
color = "gray", background = white) %>%
column_spec(2, width = "1.5in", bold = TRUE,
color = "gray", background = white) %>%
column_spec(3, bold = TRUE, color = white, background = red) %>%
column_spec(4, bold = TRUE, color = white,
background = c(red, red, white)) %>%
column_spec(5, bold = TRUE, color = white,
background = c(red, white, white)) %>%
column_spec(1:2, border_left = "1px solid lightgray",
border_right = "1px solid lightgray") %>%
column_spec(3:5, border_left = "1px solid white",
border_right = "1px solid white") %>%
row_spec(1:3, extra_css = "border-bottom: 1px solid")
counter <- counter + 1
message_1 <- "Among the survey respondents in the MEX market, retentions rates are very low in all cohorts. Let us have a look at the UK market."
message_2 <- "On the UK market, churning is also impressive among respondents. Let us turn to the US market."
print(graph)
cat(" \n")
cat('<br>')
cat(" \n")
if (counter < 3) {
cat(eval(parse(text = paste0("message_", counter))))
cat(" \n")
cat('<br>')
cat(" \n")
}
}| Respondent Cohort in MEX Market | Respondents in Join Quarter | Retention in Join Quarter + 1 | Retention in Join Quarter + 2 | Retention in Join Quarter + 3 |
|---|---|---|---|---|
| Cohort Starting in Q1 | 405 | 0.99 % | 0.99 % | 0.99 % |
| Cohort Starting in Q2 | 414 | 0.72 % | 0.72 % | |
| Cohort Starting in Q3 | 429 | 0.70 % |
| Respondent Cohort in UK Market | Respondents in Join Quarter | Retention in Join Quarter + 1 | Retention in Join Quarter + 2 | Retention in Join Quarter + 3 |
|---|---|---|---|---|
| Cohort Starting in Q1 | 403 | 0.25 % | 0.25 % | 0.50 % |
| Cohort Starting in Q2 | 416 | 0.72 % | 1.44 % | |
| Cohort Starting in Q3 | 474 | 1.27 % |
| Respondent Cohort in US Market | Respondents in Join Quarter | Retention in Join Quarter + 1 | Retention in Join Quarter + 2 | Retention in Join Quarter + 3 |
|---|---|---|---|---|
| Cohort Starting in Q1 | 420 | 0.71 % | 1.19 % | 0.24 % |
| Cohort Starting in Q2 | 388 | 0.00 % | 0.26 % | |
| Cohort Starting in Q3 | 399 | 0.50 % |
Test